#This script executes regressions to estimate 
#associations between first-generation prenatal exposure to nonattainment 
#and different measures of TSP exposure for first- and second-generation individuals.

rm(list=ls())
gc()
library(reldist)
library(ineq)
library(stargazer)
library(data.table)
library(dplyr)
library(dtplyr)
library(ggplot2)
library(stringdist)
library(lfe)
library(haven)
library(readr)
library(broom)
library(tidylog)
library(rdrobust)

# Set the working directory
setwd("/projects/opp_env/caa1970/")

# Source external R scripts for custom functions
source("Code/Child_Outcomes/child_outcomes_functions.R")
source("Code/rounding.r")

# Load the second-generation analysis dataset
load("Data/second_gen_college_analysis_data_jpemicro.rda")


Table_1 <- data.frame()

Tab1_1 <- felm(all_fullterm~ #LHS 
                 factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                 PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                 county_pop*year_trend + epop*I(year_trend^2) + 
                 total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                 tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                 state_year+county_factor+year_factor+month+
                 survey_year_by_year | 0 | #IV spec
                 county_factor, #Cluster
               data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm),!is.na(incollege),!is.na(parent_race), !is.na(PI_PC))) 

summary(Tab1_1)
Tab1_1$N
temp_n <-   rounding_rules_counts(Tab1_1$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), 
                                             !is.na(incollege),!is.na(parent_race), !is.na(PI_PC),
                                             nonattainment_infant == 0))$all_fullterm, na.rm=T ), 4)
temp_results <- tidy(Tab1_1) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = 1, column =1 ) #add variables for N and control mean, plus index the table number and column
Table_1 <- bind_rows(Table_1, temp_results) #Appended to big dataset

Tab1_2 <- felm(adult_TSP_HS~ #LHS 
                 factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                 PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                 county_pop*year_trend + epop*I(year_trend^2) + 
                 total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                 tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                 state_year+county_factor+year_factor+month+
                 survey_year_by_year | 0 | #IV spec
                 county_factor, #Cluster
               data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(incollege), !is.na(parent_race), !is.na(PI_PC))) 

summary(Tab1_2)
Tab1_2$N
temp_n <-   rounding_rules_counts(Tab1_2$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), 
                                             !is.na(incollege),!is.na(parent_race), !is.na(PI_PC),
                                             nonattainment_infant == 0))$adult_TSP_HS, na.rm=T ), 4)
temp_results <- tidy(Tab1_2) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = 1, column =2 ) #add variables for N and control mean, plus index the table number and column
Table_1 <- bind_rows(Table_1, temp_results) #Appended to big dataset

Tab1_3 <- felm(child_fullterm~ #LHS 
                 factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                 PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                 county_pop*year_trend + epop*I(year_trend^2) + 
                 total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                 tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                 state_year+county_factor+year_factor+month+
                 survey_year_by_year | 0 | #IV spec
                 county_factor, #Cluster
               data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(incollege), !is.na(parent_race), !is.na(PI_PC))) 

summary(Tab1_3)
Tab1_3$N
temp_n<-  rounding_rules_counts(Tab1_3$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(incollege),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$child_fullterm, na.rm=T ),4)
temp_results <- tidy(Tab1_3) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = 1, column =3 ) #add variables for N and control mean, plus index the table number and column
Table_1 <- bind_rows(Table_1, temp_results) #Appended to big dataset


Tab1_4 <- felm(hs_TSP~ #LHS 
                 factor(parent_race)+factor(parent_female)+PI_PC*year_trend+
                 PI_PC*I(year_trend^2) +epop*year + total_transfers*year_trend + 
                 county_pop*year_trend + epop*I(year_trend^2) + 
                 total_transfers*I(year_trend^2) + county_pop*I(year_trend^2) +
                 tmax_fullterm+ tmin_fullterm+prec_fullterm+anyprec_fullterm + nonattainment_infant | #Controls
                 state_year+county_factor+year_factor+month+
                 survey_year_by_year | 0 | #IV spec
                 county_factor, #Cluster
               data=regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(incollege), !is.na(parent_race), !is.na(PI_PC))) 

summary(Tab1_4)
Tab1_4$N
temp_n <- rounding_rules_counts(Tab1_4$N)
temp_ctrl <- signif(mean((regdata %>% filter(year%in%1969:1975, !is.na(all_fullterm), !is.na(incollege),!is.na(parent_race), !is.na(PI_PC),nonattainment_infant == 0))$hs_TSP, na.rm=T ),4)
temp_results <- tidy(Tab1_4) %>% filter(term == "nonattainment_infant" ) %>% #tidy regression, keep coef of interest
  mutate_each(funs(signif(.,4)), -term) %>% #round appropriately
  mutate(N=temp_n, ctrl_mean=temp_ctrl, table = 1, column =4 ) #add variables for N and control mean, plus index the table number and column
Table_1 <- bind_rows(Table_1, temp_results) #Appended to big dataset

write_csv(Table_1, "/projects/opp_env/caa1970/JPE_Micro/Output/Table 1/Table_1.csv")
